In [1]:
import pandas as pd
import geopandas as gpd
# from jupyterthemes import jtplot
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import basedosdados as bd
import sqlite3 as sqlite
In [2]:
pio.renderers.default='notebook'
In [3]:
dbpath = 'cities.db' 

# Create connection and load spatialite extension
con = sqlite.connect(dbpath)
con.enable_load_extension(True)
con.execute('SELECT load_extension("mod_spatialite")')

# SQL must wrap the geometry in hex(st_asbinary(...))
sql = "SELECT id, sigla_uf, name, Hex(ST_AsBinary(geometry)) as geom FROM cities;"
df = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="geom")
In [4]:
life_exp = bd.read_sql(
    '''
    SELECT id_municipio AS id, expectativa_vida
    FROM  basedosdados.mundo_onu_adh.municipio
    WHERE ano = 2010
    ''',
    billing_project_id='adameplayground')
Downloading: 100%|██████████| 5565/5565 [00:00<00:00, 9568.45rows/s]
In [5]:
cities_health = pd.merge(df, life_exp, how='inner', on='id')
In [6]:
rj_health = cities_health[cities_health['sigla_uf'] == 'RJ']
In [7]:
rj_health
Out[7]:
id sigla_uf name geom expectativa_vida
2350 3300100 RJ Angra dos Reis POLYGON ((-44.19557 -23.09831, -44.19445 -23.1... 75.75
2351 3300233 RJ Armação dos Búzios POLYGON ((-41.93436 -22.76580, -41.91900 -22.7... 74.44
2352 3300258 RJ Arraial do Cabo POLYGON ((-42.23767 -22.86367, -42.23403 -22.8... 73.31
2353 3300308 RJ Barra do Piraí POLYGON ((-43.97101 -22.32489, -43.96619 -22.3... 74.14
2354 3300456 RJ Belford Roxo POLYGON ((-43.36512 -22.68504, -43.35048 -22.6... 73.50
... ... ... ... ... ...
5253 3305901 RJ Trajano de Moraes POLYGON ((-42.15535 -22.00962, -42.14765 -22.0... 73.77
5254 3306008 RJ Três Rios POLYGON ((-43.14554 -22.09725, -43.13612 -22.1... 73.03
5255 3306156 RJ Varre-Sai POLYGON ((-41.75527 -20.80706, -41.75000 -20.8... 73.58
5256 3306206 RJ Vassouras POLYGON ((-43.45624 -22.21610, -43.43097 -22.2... 73.79
5257 3306305 RJ Volta Redonda POLYGON ((-44.02940 -22.51965, -44.03532 -22.5... 74.98

92 rows × 5 columns

In [8]:
rj_health.plot(column ='expectativa_vida',figsize= (10,10))
Out[8]:
<AxesSubplot:>
In [9]:
#rj_health.to_csv('rj_health.gzip', compression = 'gzip')
In [10]:
#gdf[gdf['sigla_uf']== 'RJ'].plot(figsize= (10,10),cmap='tab20b')
In [11]:
rj_health['id'] = rj_health['id'].astype(int)
rj_health
Out[11]:
id sigla_uf name geom expectativa_vida
2350 3300100 RJ Angra dos Reis POLYGON ((-44.19557 -23.09831, -44.19445 -23.1... 75.75
2351 3300233 RJ Armação dos Búzios POLYGON ((-41.93436 -22.76580, -41.91900 -22.7... 74.44
2352 3300258 RJ Arraial do Cabo POLYGON ((-42.23767 -22.86367, -42.23403 -22.8... 73.31
2353 3300308 RJ Barra do Piraí POLYGON ((-43.97101 -22.32489, -43.96619 -22.3... 74.14
2354 3300456 RJ Belford Roxo POLYGON ((-43.36512 -22.68504, -43.35048 -22.6... 73.50
... ... ... ... ... ...
5253 3305901 RJ Trajano de Moraes POLYGON ((-42.15535 -22.00962, -42.14765 -22.0... 73.77
5254 3306008 RJ Três Rios POLYGON ((-43.14554 -22.09725, -43.13612 -22.1... 73.03
5255 3306156 RJ Varre-Sai POLYGON ((-41.75527 -20.80706, -41.75000 -20.8... 73.58
5256 3306206 RJ Vassouras POLYGON ((-43.45624 -22.21610, -43.43097 -22.2... 73.79
5257 3306305 RJ Volta Redonda POLYGON ((-44.02940 -22.51965, -44.03532 -22.5... 74.98

92 rows × 5 columns

In [12]:
fig = px.choropleth(rj_health,
                   geojson = rj_health["geom"],
                   locations = rj_health.index,
                   color = "expectativa_vida",
                   hover_name = "name",
                   hover_data = ["expectativa_vida"],#, "death_rate"],
                   labels = {
                       "expectativa_vida": "Life Expectancy"
                   },
                   color_continuous_scale = "sunsetdark",
                   range_color=(71, 77),
                   #animation_frame = 'date',
)

fig.update_geos(
                projection=dict(
                    scale=55
                    ),
                center = dict(
                    lat=-22.208333,
                    lon=-42.896388
                    )
)

fig.show()
In [13]:
covid = pd.read_csv('caso_full.csv.gz', compression='gzip')
In [14]:
filt = (covid['state']  == 'RJ') & (covid['place_type'] == 'city') & (covid['is_last']) 
uniq = covid[filt]['city'].isin(rj_health['name'])
rj_covid = covid[filt][uniq]
In [15]:
rj_covid['id'] = rj_covid['city_ibge_code'].astype(int)
rj_covid = rj_covid.copy()[['id', 'estimated_population', 'last_available_confirmed_per_100k_inhabitants', 'last_available_death_rate']]
rj_covid
Out[15]:
id estimated_population last_available_confirmed_per_100k_inhabitants last_available_death_rate
2061595 3300100 207044.0 8288.57634 0.0315
2061596 3300159 11901.0 10360.47391 0.0251
2061597 3300209 134293.0 5493.95724 0.0656
2061598 3300225 12669.0 13071.27634 0.0308
2061599 3300233 34477.0 16480.55225 0.0128
... ... ... ... ...
2061683 3306008 82142.0 13096.83231 0.0255
2061684 3306107 76869.0 7495.86960 0.0354
2061685 3306156 11106.0 25742.84171 0.0024
2061686 3306206 37083.0 8351.53574 0.0436
2061687 3306305 273988.0 13918.12780 0.0322

92 rows × 4 columns

In [16]:
health_covid_rj = pd.merge(rj_health, rj_covid, how='inner', on='id')
health_covid_rj
Out[16]:
id sigla_uf name geom expectativa_vida estimated_population last_available_confirmed_per_100k_inhabitants last_available_death_rate
0 3300100 RJ Angra dos Reis POLYGON ((-44.19557 -23.09831, -44.19445 -23.1... 75.75 207044.0 8288.57634 0.0315
1 3300233 RJ Armação dos Búzios POLYGON ((-41.93436 -22.76580, -41.91900 -22.7... 74.44 34477.0 16480.55225 0.0128
2 3300258 RJ Arraial do Cabo POLYGON ((-42.23767 -22.86367, -42.23403 -22.8... 73.31 30593.0 3010.49260 0.0478
3 3300308 RJ Barra do Piraí POLYGON ((-43.97101 -22.32489, -43.96619 -22.3... 74.14 100764.0 7481.83875 0.0517
4 3300456 RJ Belford Roxo POLYGON ((-43.36512 -22.68504, -43.35048 -22.6... 73.50 513118.0 4634.02180 0.0386
... ... ... ... ... ... ... ... ...
87 3305901 RJ Trajano de Moraes POLYGON ((-42.15535 -22.00962, -42.14765 -22.0... 73.77 10640.0 8637.21805 0.0054
88 3306008 RJ Três Rios POLYGON ((-43.14554 -22.09725, -43.13612 -22.1... 73.03 82142.0 13096.83231 0.0255
89 3306156 RJ Varre-Sai POLYGON ((-41.75527 -20.80706, -41.75000 -20.8... 73.58 11106.0 25742.84171 0.0024
90 3306206 RJ Vassouras POLYGON ((-43.45624 -22.21610, -43.43097 -22.2... 73.79 37083.0 8351.53574 0.0436
91 3306305 RJ Volta Redonda POLYGON ((-44.02940 -22.51965, -44.03532 -22.5... 74.98 273988.0 13918.12780 0.0322

92 rows × 8 columns

In [17]:
#health_covid_rj.to_csv('health_covid_rj.gzip', compression = 'gzip')
In [ ]:
 
In [18]:
fig = px.choropleth(health_covid_rj,
                   geojson = health_covid_rj['geom'],
                   locations = health_covid_rj.index,
                   color = "last_available_death_rate",
                   hover_name = "name",
                   hover_data = ["expectativa_vida", "last_available_death_rate"],
                   labels = {
                       "expectativa_vida": "Life Expectancy",
                       "last_available_death_rate": "Death Rate"
                   },
                   color_continuous_scale = "sunsetdark",
                   #range_color=(71, 77),
                   #animation_frame = 'date',
)

fig.update_geos(
                projection=dict(
                    scale=55
                    ),
                center = dict(
                    lat=-22.208333,
                    lon=-42.896388
                    )
)

fig.show()
In [19]:
fig = px.scatter(health_covid_rj, x="last_available_death_rate", y="expectativa_vida", hover_data=['name'], trendline="ols")
fig.show()
In [27]:
!jupyter nbconvert health_analysis.ipynb --to html --template classic